#Alexander F. Gazmararian
#afg2@princeton.edu
#January 9, 2024

#Load packages
library(dplyr)
library(readxl)
library(stringr)
library(reshape2)
library(fuzzyjoin)
library(foreign)
library(tidycensus)
library(here)

#FIPS
fips <- tidycensus::fips_codes
fips$fips <- as.numeric(paste0(fips$state_code, fips$county_code))
county_fips <- subset(fips, select = c(fips, county, state))
names(county_fips)[2] <- "name"
state_fips <- read.table(here("data", "input", "eiagenerator", "state_fips_master.csv"), sep=",", header=TRUE)
# Clean state_fips
state_fips <- state_fips[,c("state_name","state_abbr","fips")]
# Create County FIP
county_fips_sub <- county_fips %>% mutate_at("name", str_replace, "County", "")
county_fips_sub$name <- trimws(county_fips_sub$name)


`%notin%` <- Negate(`%in%`)

match_fips <- function(StateAbb) {
  #subset to state
  county_fips_sub_state <- subset(county_fips_sub, state == StateAbb)
  plant_state <- subset(plant, State == StateAbb)
  #exact match
  merge1 <- merge(county_fips_sub_state,plant_state,by.x="name",by.y="County")
  #subset to left out
  leftout <- plant_state[plant_state$PLNTCODE %notin% merge1$PLNTCODE, ]
  if (nrow(leftout)>0) {
    print(leftout)
    #fuzzy match
    plantfuzzy <-
      stringdist_inner_join(county_fips_sub_state, leftout, 
                            by = list(x="name",y="County"),
                            max_dist = 3,
                            method = "lv"
      )
    plantfuzzy <- plantfuzzy[, names(merge1)]
    mergeout <- rbind(merge1,plantfuzzy)
    return(mergeout)
  }
  else {
    return(merge1)
  }
}

# 2000
# Read Files
gen <- read_xls(here("data", "input", "eiagenerator", "ExistingGenerators2000.xls"))
plant <- read_xls(here("data", "input", "eiagenerator", "Plant2000.xls"))

# Year, FIPS Columns
dt_2000 <- county_fips
dt_2000 <- dt_2000[complete.cases(dt_2000),]
dt_2000$Year <- "2000"
dt_2000 <- dt_2000[,-3]
colnames(dt_2000) <- c("FIPS","County","Year")

# Remove Unnecessary Columns in plant
plant <- plant %>%
  dplyr::rename(
    UTILCODE = EIA_UTILITY_CODE,
    PLNTCODE = PLANT_CODE...3,
    PLNTNAME = PLANT_CODE...4,
    CNTYNAME = COUNTY_NAME,
    PLNTSTATE = STATE
  )
plant <- plant[, c("UTILCODE","PLNTCODE","PLNTNAME","CNTYNAME","PLNTSTATE","PLANT_ZIP5")]
plant <- plant[complete.cases(plant),]
names(plant)[which(names(plant)=="PLNTSTATE")] <- "State"
names(plant)[which(names(plant)=="CNTYNAME")] <- "County"
plant <- subset(plant, PLANT_ZIP5 != 0 & County != "NOT IN FILE")
dim(plant)#3477

# Create County FIP
plant.out <- lapply(state.abb, match_fips)
plant <- do.call("rbind", plant.out)
names(plant)[which(names(plant)%in%c("state","name"))] <- c("PLNTSTATE","CNTYNAME")
plant$fips <- str_pad(plant$fips, width=5, side="left", pad="0")
plant <- plant[, c("fips","UTILCODE","PLNTCODE","PLNTNAME","CNTYNAME","PLNTSTATE","PLANT_ZIP5")]
colnames(plant) <- c("FIPS","UTILCODE","PLNTCODE","PLNTNAME","CNTYNAME","PLNTSTATE","PLANT_ZIP5")
saveRDS(plant, here("data", "inter", "eiagenerator", "crosswalk", "plant_fuzzy_2000.rds"))
plant <- plant[, c("FIPS","UTILCODE","PLNTCODE","PLNTNAME","CNTYNAME","PLNTSTATE")]

# Gas, Wind, or Solar?
gen <- gen %>% mutate(TypeofFuel = case_when(EXISTING_ENERGY_SOURCE_1 == "BFG" ~ "Gas",
                                             EXISTING_ENERGY_SOURCE_1 == "GAS" ~ "Gas",
                                             EXISTING_ENERGY_SOURCE_1 == "LNG" ~ "Gas",
                                             EXISTING_ENERGY_SOURCE_1 == "LPG" ~ "Gas",
                                             EXISTING_ENERGY_SOURCE_1 == "NG" ~ "Gas",
                                             EXISTING_ENERGY_SOURCE_1 == "RG" ~ "Gas",
                                             EXISTING_ENERGY_SOURCE_1 == "SNG" ~ "Gas",
                                             EXISTING_ENERGY_SOURCE_1 == "ANT" ~ "Coal",
                                             EXISTING_ENERGY_SOURCE_1 == "BIT" ~ "Coal",
                                             EXISTING_ENERGY_SOURCE_1 == "COL" ~ "Coal",
                                             EXISTING_ENERGY_SOURCE_1 == "CWM" ~ "Coal",
                                             EXISTING_ENERGY_SOURCE_1 == "LIG" ~ "Coal",
                                             EXISTING_ENERGY_SOURCE_1 == "SUB" ~ "Coal",
                                             EXISTING_ENERGY_SOURCE_1 == "SUN" ~ "Sun",
                                             EXISTING_ENERGY_SOURCE_1 == "WND" ~ "Wind"))
gen <- gen %>%
  dplyr::rename(
    UTILCODE = EIA_UTILITY_CODE,
    PLNTCODE = PLANT_CODE,
    INSVYEAR = OPERATING_MONTH,
    INSVMONTH = OPERATING_YEAR
  )
gen <- gen[,c("UTILCODE","PLNTCODE","INSVYEAR","INSVMONTH","TypeofFuel")]

# Merge into one dataset
dt_ref <- merge(plant, gen, by=c("UTILCODE","PLNTCODE"))
dt_ref <- dt_ref[complete.cases(dt_ref),]

# Defines dataset for identifying if there's any 
# plant in that county in that year, and 0 otherwise
dt_id <- dt_ref[,c("FIPS","TypeofFuel")]
dt_id <- distinct(dt_id)
dt_id$count <- 1
dt_id <- dcast(dt_id, FIPS ~ TypeofFuel, value.var="count", fun.aggregate = mean)
head(dt_id)
dt_2000 <- merge(dt_2000, dt_id, by='FIPS', all.x=TRUE)
dt_2000[is.na(dt_2000)] <- 0

# Defines dataset for identifying if there's a
# new plant in that county in that year, and 0 otherwise
dt_new <- dt_ref[,c("FIPS","TypeofFuel", "INSVYEAR")]
dt_new <- subset(dt_new, INSVYEAR%in%c("2000","1999","1998","1997"))
dt_new$count <- 1
dt_new <- dcast(dt_new, FIPS ~ TypeofFuel, value.var="count", fun.aggregate = mean)
head(dt_new)
dim(dt_new)
colnames(dt_new) <- c("FIPS","NewCoal", "NewGas", "NewSun", "NewWind")

dt_2000 <- merge(dt_2000, dt_new, by='FIPS', all.x=TRUE)
dt_2000[is.na(dt_2000)] <- 0

saveRDS(dt_2000, here("data", "inter", "eiagenerator", "2000.rds"))
rm(list = c("dt_2000", "dt_id", "dt_new", "dt_ref", "gen", "plant"))
gc()


#2004
# Read Files
gen <- read_excel(here("data", "input", "eiagenerator", "GenY04.xls"))
plant <- read_excel(here("data", "input", "eiagenerator", "PlantY04.xls"))

# Year, FIPS Columns
dt_2004 <- county_fips
dt_2004 <- dt_2004[complete.cases(dt_2004),]
dt_2004$Year <- "2004"
dt_2004 <- dt_2004[,-3]
colnames(dt_2004) <- c("FIPS","County","Year")

# Remove Unnecessary Columns in plant
plant <- plant[, c("UTILCODE","PLNTCODE","PLNTNAME","CNTYNAME","STATE","PLNTZIP")]
plant <- plant[complete.cases(plant),]
names(plant)[which(names(plant)=="STATE")] <- "State"
names(plant)[which(names(plant)=="CNTYNAME")] <- "County"
plant <- subset(plant, PLNTZIP != 0 & County != "NOT IN FILE")
dim(plant)#5858

# Create County FIP
plant.out <- lapply(state.abb, match_fips)
plant <- do.call("rbind", plant.out)
dim(plant)#5648
names(plant)[which(names(plant)%in%c("state","name"))] <- c("STATE","CNTYNAME")

# Create County FIP
plant$fips <- str_pad(plant$fips, width=5, side="left", pad="0")
plant <- plant[, c("fips","UTILCODE","PLNTCODE","PLNTNAME","CNTYNAME","STATE","PLNTZIP")]
colnames(plant) <-c("FIPS","UTILCODE","PLNTCODE","PLNTNAME","CNTYNAME","STATE","PLNTZIP")
saveRDS(plant, here("data", "inter", "eiagenerator", "crosswalk", "plant_fuzzy_2004.rds"))
plant <- plant[, c("FIPS","UTILCODE","PLNTCODE","PLNTNAME","CNTYNAME","STATE")]

# Gas, Wind, or Solar?
gen <- gen %>% mutate(TypeofFuel = case_when(ENERGY_SOURCE_1 == "BFG" ~ "Gas",
                                             ENERGY_SOURCE_1 == "GAS" ~ "Gas",
                                             ENERGY_SOURCE_1 == "LNG" ~ "Gas",
                                             ENERGY_SOURCE_1 == "LPG" ~ "Gas",
                                             ENERGY_SOURCE_1 == "NG" ~ "Gas",
                                             ENERGY_SOURCE_1 == "RG" ~ "Gas",
                                             ENERGY_SOURCE_1 == "SNG" ~ "Gas",
                                             ENERGY_SOURCE_1 == "ANT" ~ "Coal",
                                             ENERGY_SOURCE_1 == "BIT" ~ "Coal",
                                             ENERGY_SOURCE_1 == "COL" ~ "Coal",
                                             ENERGY_SOURCE_1 == "CWM" ~ "Coal",
                                             ENERGY_SOURCE_1 == "LIG" ~ "Coal",
                                             ENERGY_SOURCE_1 == "SUB" ~ "Coal",
                                             ENERGY_SOURCE_1 == "SUN" ~ "Sun",
                                             ENERGY_SOURCE_1 == "WND" ~ "Wind"))
gen <- gen[,c("UTILCODE","PLNTCODE","INSVYEAR","INSVMONTH","TypeofFuel")]

# Merge into one dataset
dt_ref <- merge(plant, gen, by=c("UTILCODE","PLNTCODE"))
dt_ref <- dt_ref[complete.cases(dt_ref),]

# Defines dataset for identifying if there's any 
# plant in that county in that year, and 0 otherwise
dt_id <- dt_ref[,c("FIPS","TypeofFuel")]
dt_id <- distinct(dt_id)
dt_id$count <- 1
dt_id <- dcast(dt_id, FIPS ~ TypeofFuel, value.var="count", fun.aggregate = mean)

dt_2004 <- merge(dt_2004, dt_id, by='FIPS', all.x=TRUE)
dt_2004[is.na(dt_2004)] <- 0

# Defines dataset for identifying if there's a
# new plant in that county in that year, and 0 otherwise
dt_new <- dt_ref[,c("FIPS","TypeofFuel", "INSVYEAR")]
dt_new <- subset(dt_new, INSVYEAR%in%c("2004","2003","2002","2001"))
dt_new$count <- 1
dt_new <- dcast(dt_new, FIPS ~ TypeofFuel, value.var="count", fun.aggregate = mean)
colnames(dt_new) <- c("FIPS","NewCoal","NewGas","NewSun", "NewWind")

dt_2004 <- merge(dt_2004, dt_new, by='FIPS', all.x=TRUE)
dt_2004[is.na(dt_2004)] <- 0

saveRDS(dt_2004, here("data", "inter", "eiagenerator", "2004.rds"))
rm(list=c("dt_2004", "dt_id", "dt_new", "dt_ref", "gen", "plant"))
gc()


# 2008

# Read Files
gen <- read_excel(here("data", "input", "eiagenerator", "GenY08.xls"))
plant <- read_excel(here("data", "input", "eiagenerator", "PlantY08.xls"))

# Year, FIPS Columns
dt_2008 <- county_fips
dt_2008 <- dt_2008[complete.cases(dt_2008),]
dt_2008$Year <- "2008"
dt_2008 <- dt_2008[,-3]
colnames(dt_2008) <- c("FIPS","County","Year")

# Remove Unnecessary Columns in plant
plant <- plant[, c("UTILCODE","PLNTCODE","PLNTNAME","COUNTY","STATE","ZIP5")]
plant <- plant[complete.cases(plant),]
names(plant)[which(names(plant)=="STATE")] <- "State"
names(plant)[which(names(plant)=="COUNTY")] <- "County"
plant <- subset(plant, ZIP5 != 0 & County != "NOT IN FILE")
dim(plant)#6562

# Create County FIP
plant.out <- lapply(state.abb, match_fips)
plant <- do.call("rbind", plant.out)
dim(plant)#5648
names(plant)[which(names(plant)%in%c("state","name"))] <- c("STATE","COUNTY")

# Create County FIP
plant$fips <- str_pad(plant$fips, width=5, side="left", pad="0")
plant <- plant[, c("fips","UTILCODE","PLNTCODE","PLNTNAME","COUNTY","STATE","ZIP5")]
colnames(plant) <-c("FIPS","UTILCODE","PLNTCODE","PLNTNAME","COUNTY","STATE","ZIP5")
saveRDS(plant, here("data", "inter", "eiagenerator", "crosswalk", "plant_fuzzy_2008.rds"))
plant <- plant[, c("FIPS","UTILCODE","PLNTCODE","PLNTNAME","COUNTY","STATE")]

# Gas, Wind, or Solar?
gen <- gen %>% mutate(TypeofFuel = case_when(ENERGY_SOURCE_1 == "BFG" ~ "Gas",
                                             ENERGY_SOURCE_1 == "GAS" ~ "Gas",
                                             ENERGY_SOURCE_1 == "LNG" ~ "Gas",
                                             ENERGY_SOURCE_1 == "LPG" ~ "Gas",
                                             ENERGY_SOURCE_1 == "NG" ~ "Gas",
                                             ENERGY_SOURCE_1 == "RG" ~ "Gas",
                                             ENERGY_SOURCE_1 == "SNG" ~ "Gas",
                                             ENERGY_SOURCE_1 == "ANT" ~ "Coal",
                                             ENERGY_SOURCE_1 == "BIT" ~ "Coal",
                                             ENERGY_SOURCE_1 == "COL" ~ "Coal",
                                             ENERGY_SOURCE_1 == "CWM" ~ "Coal",
                                             ENERGY_SOURCE_1 == "LIG" ~ "Coal",
                                             ENERGY_SOURCE_1 == "SUB" ~ "Coal",
                                             ENERGY_SOURCE_1 == "SUN" ~ "Sun",
                                             ENERGY_SOURCE_1 == "WND" ~ "Wind"))
gen <- gen[,c("UTILCODE","PLNTCODE","OPERATING_YEAR","OPERATING_MONTH","TypeofFuel")]

# Merge into one dataset
dt_ref <- merge(plant, gen, by=c("UTILCODE","PLNTCODE"))
dt_ref <- dt_ref[complete.cases(dt_ref),]

# Defines dataset for identifying if there's any 
# plant in that county in that year, and 0 otherwise
dt_id <- dt_ref[,c("FIPS","TypeofFuel")]
dt_id <- distinct(dt_id)
dt_id$count <- 1
dt_id <- dcast(dt_id, FIPS ~ TypeofFuel, value.var="count", fun.aggregate = mean)

dt_2008 <- merge(dt_2008, dt_id, by='FIPS', all.x=TRUE)
dt_2008[is.na(dt_2008)] <- 0

# Defines dataset for identifying if there's a
# new plant in that county in that year, and 0 otherwise
dt_new <- dt_ref[,c("FIPS","TypeofFuel", "OPERATING_YEAR")]
#dt_new <- subset(dt_new, OPERATING_YEAR=='2008')
dt_new <- subset(dt_new, OPERATING_YEAR%in%c("2008","2007","2006","2005"))
dt_new$count <- 1
dt_new <- dcast(dt_new, FIPS ~ TypeofFuel, value.var="count", fun.aggregate = mean)
colnames(dt_new) <- c("FIPS","NewCoal","NewGas","NewSun","NewWind")

dt_2008 <- merge(dt_2008, dt_new, by='FIPS', all.x=TRUE)
dt_2008[is.na(dt_2008)] <- 0

saveRDS(dt_2008, here("data", "inter", "eiagenerator", "2008.rds"))
rm(list = c("dt_2008", "dt_id", "dt_new", "dt_ref", "gen", "plant"))
gc()

# 2012----
# Read Files
gen <- read_excel(here("data", "input", "eiagenerator", "GeneratorY2012.xlsx"), skip = 1)
plant <- read_excel(here("data", "input", "eiagenerator", "PlantY2012.xlsx"), skip = 1)

# Year, FIPS Columns
dt_2012 <- county_fips
dt_2012 <- dt_2012[complete.cases(dt_2012),]
dt_2012$Year <- "2012"
dt_2012 <- dt_2012[,-3]
colnames(dt_2012) <- c("FIPS","County","Year")

# Remove Unnecessary Columns in plant
plant <- plant[, c("Utility ID","Plant Code","Plant Name","County","State", "Zip")]
plant <- plant[complete.cases(plant),]
plant <- subset(plant, Zip != 0 & County != "NOT IN FILE")
names(plant)[which(names(plant)=="Plant Code")] <- "PLNTCODE"
dim(plant)#7263

# Create County FIP
plant.out <- lapply(state.abb, match_fips)
plant <- do.call("rbind", plant.out)
dim(plant)#5648
names(plant)[which(names(plant)%in%c("PLNTCODE","name","state"))] <- c("County","State","Plant Code")
plant$fips <- str_pad(plant$fips, width=5, side="left", pad="0")
plant <- plant[, c("fips","Utility ID","Plant Code","Plant Name","County","State", "Zip")]
colnames(plant) <-c("FIPS","Utility ID","Plant Code","Plant Name","County","State", "Zip")
saveRDS(plant, here("data", "inter", "eiagenerator", "crosswalk", "plant_fuzzy_2012.rds"))
plant <- plant[, c("FIPS","Utility ID","Plant Code","Plant Name","County","State")]

# Gas, Wind, or Solar?
gen <- gen %>% mutate(TypeofFuel = case_when(`Energy Source 1` == "BFG" ~ "Gas",
                                             `Energy Source 1` == "GAS" ~ "Gas",
                                             `Energy Source 1` == "LNG" ~ "Gas",
                                             `Energy Source 1` == "LPG" ~ "Gas",
                                             `Energy Source 1` == "NG" ~ "Gas",
                                             `Energy Source 1` == "RG" ~ "Gas",
                                             `Energy Source 1` == "SNG" ~ "Gas",
                                             `Energy Source 1` == "ANT" ~ "Coal",
                                             `Energy Source 1` == "BIT" ~ "Coal",
                                             `Energy Source 1` == "COL" ~ "Coal",
                                             `Energy Source 1` == "CWM" ~ "Coal",
                                             `Energy Source 1` == "LIG" ~ "Coal",
                                             `Energy Source 1` == "SUB" ~ "Coal",
                                             `Energy Source 1` == "SUN" ~ "Sun",
                                             `Energy Source 1` == "WND" ~ "Wind"))
gen <- gen[,c("Utility ID","Plant Code","Operating Year","Operating Month","TypeofFuel")]

# Merge into one dataset
dt_ref <- merge(plant, gen, by=c("Utility ID","Plant Code"))
dt_ref <- dt_ref[complete.cases(dt_ref),]

# Defines dataset for identifying if there's any 
# plant in that county in that year, and 0 otherwise
dt_id <- dt_ref[,c("FIPS","TypeofFuel")]
dt_id <- distinct(dt_id)
dt_id$count <- 1
dt_id <- dcast(dt_id, FIPS ~ TypeofFuel, value.var="count", fun.aggregate = mean)

dt_2012 <- merge(dt_2012, dt_id, by='FIPS', all.x=TRUE)
dt_2012[is.na(dt_2012)] <- 0

# Defines dataset for identifying if there's a
# new plant in that county in that year, and 0 otherwise
dt_new <- dt_ref[,c("FIPS","TypeofFuel", "Operating Year")]
dt_new <- subset(dt_new, `Operating Year`%in%c("2012","2011","2010","2009"))
dt_new$count <- 1
dt_new <- dcast(dt_new, FIPS ~ TypeofFuel, value.var="count", fun.aggregate = mean)
colnames(dt_new) <- c("FIPS","NewCoal","NewGas","NewSun","NewWind")

dt_2012 <- merge(dt_2012, dt_new, by='FIPS', all.x=TRUE)
dt_2012[is.na(dt_2012)] <- 0
saveRDS(dt_2012, here("data", "inter", "eiagenerator", "2012.rds"))
rm(list = c("dt_2012", "dt_id", "dt_new", "dt_ref", "gen", "plant"))
gc()


#2016
# Read Files
gen <- read_excel(here("data", "input", "eiagenerator", "3_1_Generator_Y2016.xlsx"),skip=1)
plant <- read_excel(here("data", "input", "eiagenerator", "2___Plant_Y2016.xlsx"),skip=1)

# Year, FIPS Columns
dt_2016 <- county_fips
dt_2016 <- dt_2016[complete.cases(dt_2016),]
dt_2016$Year <- "2016"
dt_2016 <- dt_2016[,-3]
colnames(dt_2016) <- c("FIPS","County","Year")

# Remove Unnecessary Columns in plant
head(plant)
plant <- plant[, c("Utility ID","Plant Code","Plant Name","County","State", "Zip")]
plant <- plant[complete.cases(plant),]
plant <- subset(plant, Zip != 0 & County != "NOT IN FILE")
names(plant)[which(names(plant)=="Plant Code")] <- "PLNTCODE"
dim(plant)#12604

# Create County FIP
plant.out <- lapply(state.abb, match_fips)
plant <- do.call("rbind", plant.out)
dim(plant)#12317
names(plant)[which(names(plant)%in%c("PLNTCODE","name","state"))] <- c("County","State", "Plant Code")
plant$fips <- str_pad(plant$fips, width=5, side="left", pad="0")
plant <- plant[, c("fips","Utility ID","Plant Code","Plant Name","County","State", "Zip")]
colnames(plant) <-c("FIPS","Utility ID","Plant Code","Plant Name","County","State", "Zip")
saveRDS(plant, here("data", "inter", "eiagenerator", "crosswalk", "plant_fuzzy_2016.rds"))
plant <- plant[, c("FIPS","Utility ID","Plant Code","Plant Name","County","State")]

# Gas, Wind, or Solar?
gen <- gen %>% mutate(TypeofFuel = case_when(`Energy Source 1` == "BFG" ~ "Gas",
                                             `Energy Source 1` == "GAS" ~ "Gas",
                                             `Energy Source 1` == "LNG" ~ "Gas",
                                             `Energy Source 1` == "LPG" ~ "Gas",
                                             `Energy Source 1` == "NG" ~ "Gas",
                                             `Energy Source 1` == "RG" ~ "Gas",
                                             `Energy Source 1` == "SNG" ~ "Gas",
                                             `Energy Source 1` == "ANT" ~ "Coal",
                                             `Energy Source 1` == "BIT" ~ "Coal",
                                             `Energy Source 1` == "COL" ~ "Coal",
                                             `Energy Source 1` == "CWM" ~ "Coal",
                                             `Energy Source 1` == "LIG" ~ "Coal",
                                             `Energy Source 1` == "SUB" ~ "Coal",
                                             `Energy Source 1` == "SUN" ~ "Sun",
                                             `Energy Source 1` == "WND" ~ "Wind"))
gen <- gen[,c("Utility ID","Plant Code","Operating Year","Operating Month","TypeofFuel")]

# Merge into one dataset
dt_ref <- merge(plant, gen, by=c("Utility ID","Plant Code"))
dt_ref <- dt_ref[complete.cases(dt_ref),]

# Defines dataset for identifying if there's any 
# plant in that county in that year, and 0 otherwise
dt_id <- dt_ref[,c("FIPS","TypeofFuel")]
dt_id <- distinct(dt_id)
dt_id$count <- 1
dt_id <- dcast(dt_id, FIPS ~ TypeofFuel, value.var="count", fun.aggregate = mean)

dt_2016 <- merge(dt_2016, dt_id, by='FIPS', all.x=TRUE)
dt_2016[is.na(dt_2016)] <- 0

# Defines dataset for identifying if there's a
# new plant in that county in that year, and 0 otherwise
dt_new <- dt_ref[,c("FIPS","TypeofFuel", "Operating Year")]
dt_new <- subset(dt_new, `Operating Year`%in%c("2016","2015","2014","2013"))
dt_new$count <- 1
dt_new <- dcast(dt_new, FIPS ~ TypeofFuel, value.var="count", fun.aggregate = mean)
colnames(dt_new) <- c("FIPS","NewGas","NewSun","NewWind")
dt_new$NewCoal <- 0
dt_new <- dt_new[,c(1,5,2,3,4)]

dt_2016 <- merge(dt_2016, dt_new, by='FIPS', all.x=TRUE)
dt_2016[is.na(dt_2016)] <- 0
dim(dt_2016)
saveRDS(dt_2016, here("data", "inter", "eiagenerator", "2016.rds"))
rm(list=c("dt_2016", "dt_id", "dt_new", "dt_ref", "gen", "plant"))
gc()


# 2020

# Read Files
gen <- read_excel(here("data", "input", "eiagenerator", "3_1_Generator_Y2020.xlsx"))
plant <- read_excel(here("data", "input", "eiagenerator", "2___Plant_Y2020.xlsx"))

# Year, FIPS Columns
dt_2020 <- county_fips
dt_2020 <- dt_2020[complete.cases(dt_2020),]
dt_2020$Year <- "2020"
dt_2020 <- dt_2020[,-3]
colnames(dt_2020) <- c("FIPS","County","Year")

# Remove Unnecessary Columns in plant
head(plant)
plant <- plant[, c("Utility ID","Plant Code","Plant Name","County","State", "Zip")]
plant <- plant[complete.cases(plant),]
plant <- subset(plant, Zip != 0 & County != "NOT IN FILE")
names(plant)[which(names(plant)=="Plant Code")] <- "PLNTCODE"
dim(plant)#12604

# Create County FIP
plant.out <- lapply(state.abb, match_fips)
plant <- do.call("rbind", plant.out)
dim(plant)#12317
names(plant)[which(names(plant)%in%c("PLNTCODE","name","state"))] <- c("County","State", "Plant Code")
plant$fips <- str_pad(plant$fips, width=5, side="left", pad="0")
plant <- plant[, c("fips","Utility ID","Plant Code","Plant Name","County","State","Zip")]
colnames(plant) <-c("FIPS","Utility ID","Plant Code","Plant Name","County","State","Zip")
saveRDS(plant, here("data", "inter", "eiagenerator", "crosswalk", "plant_fuzzy_2020.rds"))
plant <- plant[, c("FIPS","Utility ID","Plant Code","Plant Name","County","State")]

# Gas, Wind, or Solar?
head(gen)
gen <- gen %>% mutate(TypeofFuel = case_when(`Energy Source 1` == "BFG" ~ "Gas",
                                             `Energy Source 1` == "GAS" ~ "Gas",
                                             `Energy Source 1` == "LNG" ~ "Gas",
                                             `Energy Source 1` == "LPG" ~ "Gas",
                                             `Energy Source 1` == "NG" ~ "Gas",
                                             `Energy Source 1` == "RG" ~ "Gas",
                                             `Energy Source 1` == "SNG" ~ "Gas",
                                             `Energy Source 1` == "ANT" ~ "Coal",
                                             `Energy Source 1` == "BIT" ~ "Coal",
                                             `Energy Source 1` == "COL" ~ "Coal",
                                             `Energy Source 1` == "CWM" ~ "Coal",
                                             `Energy Source 1` == "LIG" ~ "Coal",
                                             `Energy Source 1` == "SUB" ~ "Coal",
                                             `Energy Source 1` == "SUN" ~ "Sun",
                                             `Energy Source 1` == "WND" ~ "Wind"))
gen <- gen[,c("Utility ID","Plant Code","Operating Year","Operating Month","TypeofFuel")]

# Merge into one dataset
dt_ref <- merge(plant, gen, by=c("Utility ID","Plant Code"))
dt_ref <- dt_ref[complete.cases(dt_ref),]

# Defines dataset for identifying if there's any 
# plant in that county in that year, and 0 otherwise
dt_id <- dt_ref[,c("FIPS","TypeofFuel")]
dt_id <- distinct(dt_id)
dt_id$count <- 1
dt_id <- dcast(dt_id, FIPS ~ TypeofFuel, value.var="count", fun.aggregate = mean)

dt_2020 <- merge(dt_2020, dt_id, by='FIPS', all.x=TRUE)
dt_2020[is.na(dt_2020)] <- 0

# Defines dataset for identifying if there's a
# new plant in that county in that year, and 0 otherwise
dt_new <- dt_ref[,c("FIPS","TypeofFuel", "Operating Year")]
dt_new <- subset(dt_new, `Operating Year`%in%c("2020","2019","2018","2017"))
dt_new$count <- 1
dt_new <- dcast(dt_new, FIPS ~ TypeofFuel, value.var="count", fun.aggregate = mean)
colnames(dt_new) <- c("FIPS","NewGas","NewSun","NewWind")
dt_new$NewCoal <- 0
dt_new <- dt_new[,c(1,5,2,3,4)]

dt_2020 <- merge(dt_2020, dt_new, by='FIPS', all.x=TRUE)
dt_2020[is.na(dt_2020)] <- 0

saveRDS(dt_2020, here("data", "inter", "eiagenerator", "2020.rds"))
rm(list = c("dt_2020", "dt_id", "dt_new", "dt_ref", "gen", "plant"))
gc()
